In [1]:
from IPython.display import Image
Image('img/FIFA 17.jpg')
Out[1]:
In [2]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

import warnings
warnings.filterwarnings('ignore')

General Dataset Information

In [3]:
df = pd.read_csv('data/FullData.csv')                      # contains full data, to be analyzed
df.head(5)
Out[3]:
Name Nationality National_Position National_Kit Club Club_Position Club_Kit Club_Joining Contract_Expiry Rating ... Long_Shots Curve Freekick_Accuracy Penalties Volleys GK_Positioning GK_Diving GK_Kicking GK_Handling GK_Reflexes
0 Cristiano Ronaldo Portugal LS 7.0 Real Madrid LW 7.0 07-01-2009 2021.0 94 ... 90 81 76 85 88 14 7 15 11 11
1 Lionel Messi Argentina RW 10.0 FC Barcelona RW 10.0 07-01-2004 2018.0 93 ... 88 89 90 74 85 14 6 15 11 8
2 Neymar Brazil LW 10.0 FC Barcelona LW 11.0 07-01-2013 2021.0 92 ... 77 79 84 81 83 15 9 15 9 11
3 Luis Suárez Uruguay LS 9.0 FC Barcelona ST 9.0 07-11-2014 2021.0 92 ... 86 86 84 85 88 33 27 31 25 37
4 Manuel Neuer Germany GK 1.0 FC Bayern GK 1.0 07-01-2011 2021.0 92 ... 16 14 11 47 11 91 89 95 90 89

5 rows × 53 columns

In [4]:
df.info()                     # tells us about type of objects in df
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17588 entries, 0 to 17587
Data columns (total 53 columns):
Name                  17588 non-null object
Nationality           17588 non-null object
National_Position     1075 non-null object
National_Kit          1075 non-null float64
Club                  17588 non-null object
Club_Position         17587 non-null object
Club_Kit              17587 non-null float64
Club_Joining          17587 non-null object
Contract_Expiry       17587 non-null float64
Rating                17588 non-null int64
Height                17588 non-null object
Weight                17588 non-null object
Preffered_Foot        17588 non-null object
Birth_Date            17588 non-null object
Age                   17588 non-null int64
Preffered_Position    17588 non-null object
Work_Rate             17588 non-null object
Weak_foot             17588 non-null int64
Skill_Moves           17588 non-null int64
Ball_Control          17588 non-null int64
Dribbling             17588 non-null int64
Marking               17588 non-null int64
Sliding_Tackle        17588 non-null int64
Standing_Tackle       17588 non-null int64
Aggression            17588 non-null int64
Reactions             17588 non-null int64
Attacking_Position    17588 non-null int64
Interceptions         17588 non-null int64
Vision                17588 non-null int64
Composure             17588 non-null int64
Crossing              17588 non-null int64
Short_Pass            17588 non-null int64
Long_Pass             17588 non-null int64
Acceleration          17588 non-null int64
Speed                 17588 non-null int64
Stamina               17588 non-null int64
Strength              17588 non-null int64
Balance               17588 non-null int64
Agility               17588 non-null int64
Jumping               17588 non-null int64
Heading               17588 non-null int64
Shot_Power            17588 non-null int64
Finishing             17588 non-null int64
Long_Shots            17588 non-null int64
Curve                 17588 non-null int64
Freekick_Accuracy     17588 non-null int64
Penalties             17588 non-null int64
Volleys               17588 non-null int64
GK_Positioning        17588 non-null int64
GK_Diving             17588 non-null int64
GK_Kicking            17588 non-null int64
GK_Handling           17588 non-null int64
GK_Reflexes           17588 non-null int64
dtypes: float64(3), int64(38), object(12)
memory usage: 7.1+ MB
In [5]:
df.describe()
Out[5]:
National_Kit Club_Kit Contract_Expiry Rating Age Weak_foot Skill_Moves Ball_Control Dribbling Marking ... Long_Shots Curve Freekick_Accuracy Penalties Volleys GK_Positioning GK_Diving GK_Kicking GK_Handling GK_Reflexes
count 1075.000000 17587.000000 17587.000000 17588.000000 17588.000000 17588.000000 17588.000000 17588.000000 17588.000000 17588.000000 ... 17588.000000 17588.000000 17588.000000 17588.000000 17588.000000 17588.000000 17588.000000 17588.000000 17588.000000 17588.000000
mean 12.219535 21.294536 2018.899414 66.166193 25.460314 2.934103 2.303161 57.972766 54.802877 44.230327 ... 47.403173 47.181146 43.383443 49.165738 43.275586 16.609620 16.823061 16.458324 16.559814 16.901183
std 6.933187 19.163741 1.698787 7.083012 4.680217 0.655927 0.746156 16.834779 18.913857 21.561703 ... 19.211887 18.464396 17.701903 15.871735 17.710839 17.139904 17.798052 16.600741 16.967256 18.034485
min 1.000000 1.000000 2017.000000 45.000000 17.000000 1.000000 1.000000 5.000000 4.000000 3.000000 ... 4.000000 6.000000 4.000000 7.000000 3.000000 1.000000 1.000000 1.000000 1.000000 1.000000
25% 6.000000 9.000000 2017.000000 62.000000 22.000000 3.000000 2.000000 53.000000 47.000000 22.000000 ... 32.000000 34.000000 31.000000 39.000000 30.000000 8.000000 8.000000 8.000000 8.000000 8.000000
50% 12.000000 18.000000 2019.000000 66.000000 25.000000 3.000000 2.000000 63.000000 60.000000 48.000000 ... 52.000000 48.000000 42.000000 50.000000 44.000000 11.000000 11.000000 11.000000 11.000000 11.000000
75% 18.000000 27.000000 2020.000000 71.000000 29.000000 3.000000 3.000000 69.000000 68.000000 64.000000 ... 63.000000 62.000000 57.000000 61.000000 57.000000 14.000000 14.000000 14.000000 14.000000 14.000000
max 36.000000 99.000000 2023.000000 94.000000 47.000000 5.000000 5.000000 95.000000 97.000000 92.000000 ... 91.000000 92.000000 93.000000 96.000000 93.000000 91.000000 89.000000 95.000000 91.000000 90.000000

8 rows × 41 columns

In [6]:
df.isnull().any()                            #checking null val. for each col
Out[6]:
Name                  False
Nationality           False
National_Position      True
National_Kit           True
Club                  False
Club_Position          True
Club_Kit               True
Club_Joining           True
Contract_Expiry        True
Rating                False
Height                False
Weight                False
Preffered_Foot        False
Birth_Date            False
Age                   False
Preffered_Position    False
Work_Rate             False
Weak_foot             False
Skill_Moves           False
Ball_Control          False
Dribbling             False
Marking               False
Sliding_Tackle        False
Standing_Tackle       False
Aggression            False
Reactions             False
Attacking_Position    False
Interceptions         False
Vision                False
Composure             False
Crossing              False
Short_Pass            False
Long_Pass             False
Acceleration          False
Speed                 False
Stamina               False
Strength              False
Balance               False
Agility               False
Jumping               False
Heading               False
Shot_Power            False
Finishing             False
Long_Shots            False
Curve                 False
Freekick_Accuracy     False
Penalties             False
Volleys               False
GK_Positioning        False
GK_Diving             False
GK_Kicking            False
GK_Handling           False
GK_Reflexes           False
dtype: bool
In [7]:
df.isnull().sum()                                  #checking null val. for each col
Out[7]:
Name                      0
Nationality               0
National_Position     16513
National_Kit          16513
Club                      0
Club_Position             1
Club_Kit                  1
Club_Joining              1
Contract_Expiry           1
Rating                    0
Height                    0
Weight                    0
Preffered_Foot            0
Birth_Date                0
Age                       0
Preffered_Position        0
Work_Rate                 0
Weak_foot                 0
Skill_Moves               0
Ball_Control              0
Dribbling                 0
Marking                   0
Sliding_Tackle            0
Standing_Tackle           0
Aggression                0
Reactions                 0
Attacking_Position        0
Interceptions             0
Vision                    0
Composure                 0
Crossing                  0
Short_Pass                0
Long_Pass                 0
Acceleration              0
Speed                     0
Stamina                   0
Strength                  0
Balance                   0
Agility                   0
Jumping                   0
Heading                   0
Shot_Power                0
Finishing                 0
Long_Shots                0
Curve                     0
Freekick_Accuracy         0
Penalties                 0
Volleys                   0
GK_Positioning            0
GK_Diving                 0
GK_Kicking                0
GK_Handling               0
GK_Reflexes               0
dtype: int64

Heatmap Analysis

if corr ~ 1, then 2 attributes are highly correlated and follow the same trend

if corr ~ -1, then 2 attributes are highly correlated and follow the inverse/opposite trend

if corr ~ 0 then 2 attributes are nearly independent of each other

In [8]:
def heatmap(df,figsize=(25,25),annot_size = 8,cmap=sns.cubehelix_palette(start = 0.2,rot = 0.3,dark = 0.15,light = 0.85,as_cmap = True)):
    corr = df.corr()
    _,ax = plt.subplots(1,1,figsize=figsize)
    sns.heatmap(corr,
               cbar=True,
               cbar_kws={'shrink':0.9},
               annot=True,
               annot_kws={'fontsize':annot_size},
               cmap = cmap
               )
    plt.show()  
   
heatmap(df)

Top 10 and Bottom 10 teams based on average player rating

In [9]:
teamdf = df.copy(deep=True)
In [10]:
teamdf = teamdf.drop(["Nationality", "National_Position", "Club_Joining", "Contract_Expiry"], axis=1)
teamdf.head(5)
Out[10]:
Name National_Kit Club Club_Position Club_Kit Rating Height Weight Preffered_Foot Birth_Date ... Long_Shots Curve Freekick_Accuracy Penalties Volleys GK_Positioning GK_Diving GK_Kicking GK_Handling GK_Reflexes
0 Cristiano Ronaldo 7.0 Real Madrid LW 7.0 94 185 cm 80 kg Right 02-05-1985 ... 90 81 76 85 88 14 7 15 11 11
1 Lionel Messi 10.0 FC Barcelona RW 10.0 93 170 cm 72 kg Left 06/24/1987 ... 88 89 90 74 85 14 6 15 11 8
2 Neymar 10.0 FC Barcelona LW 11.0 92 174 cm 68 kg Right 02-05-1992 ... 77 79 84 81 83 15 9 15 9 11
3 Luis Suárez 9.0 FC Barcelona ST 9.0 92 182 cm 85 kg Right 01/24/1987 ... 86 86 84 85 88 33 27 31 25 37
4 Manuel Neuer 1.0 FC Bayern GK 1.0 92 193 cm 92 kg Right 03/27/1986 ... 16 14 11 47 11 91 89 95 90 89

5 rows × 49 columns

In [11]:
teams = teamdf.groupby(['Club'])['Rating'].mean().sort_values(ascending=False)
In [12]:
teams.head(10)
Out[12]:
Club
Free agent         81.000000
Juventus           79.785714
FC Bayern          79.115385
Real Madrid        78.757576
FC Barcelona       78.090909
Napoli             77.769231
Roma               77.760000
Sevilla FC         76.964286
PSG                76.709677
Atlético Madrid    76.548387
Name: Rating, dtype: float64
In [13]:
teams.tail(10)
Out[13]:
Club
Shamrock Rovers    56.640000
Crewe Alexandra    56.037037
Bray Wanderers     56.000000
Sligo Rovers       55.739130
Galway United      54.904762
Bohemian FC        54.791667
Finn Harps         54.640000
Derry City         54.541667
Longford Town      52.200000
Wexford Youths     51.863636
Name: Rating, dtype: float64
In [14]:
teamdf[teamdf['Club_Position'].isnull()]
Out[14]:
Name National_Kit Club Club_Position Club_Kit Rating Height Weight Preffered_Foot Birth_Date ... Long_Shots Curve Freekick_Accuracy Penalties Volleys GK_Positioning GK_Diving GK_Kicking GK_Handling GK_Reflexes
383 Didier Drogba NaN Free agent NaN NaN 81 189 cm 80 kg Right 03-11-1978 ... 79 78 84 84 76 6 10 8 11 14

1 rows × 49 columns

In [15]:
teams_wo_freeagent = teamdf.drop(teamdf.index[383])
teams_wo_freeagent.shape
Out[15]:
(17587, 49)
In [16]:
# Removed free agent from the set
teams = teams_wo_freeagent.groupby(['Club'])['Rating'].mean().sort_values(ascending=False)
print("Top 10 Teams")
top10_teams = teams.head(10)
top10_teams
Top 10 Teams
Out[16]:
Club
Juventus           79.785714
FC Bayern          79.115385
Real Madrid        78.757576
FC Barcelona       78.090909
Napoli             77.769231
Roma               77.760000
Sevilla FC         76.964286
PSG                76.709677
Atlético Madrid    76.548387
Arsenal            76.333333
Name: Rating, dtype: float64
In [17]:
print("Bottom 10 Teams")
bottom10_teams = teams.tail(10)
bottom10_teams
Bottom 10 Teams
Out[17]:
Club
Shamrock Rovers    56.640000
Crewe Alexandra    56.037037
Bray Wanderers     56.000000
Sligo Rovers       55.739130
Galway United      54.904762
Bohemian FC        54.791667
Finn Harps         54.640000
Derry City         54.541667
Longford Town      52.200000
Wexford Youths     51.863636
Name: Rating, dtype: float64

Best players at each unique Club Position

In [18]:
player_df = pd.read_csv("data/FullData.csv")
player_df.head(5)
Out[18]:
Name Nationality National_Position National_Kit Club Club_Position Club_Kit Club_Joining Contract_Expiry Rating ... Long_Shots Curve Freekick_Accuracy Penalties Volleys GK_Positioning GK_Diving GK_Kicking GK_Handling GK_Reflexes
0 Cristiano Ronaldo Portugal LS 7.0 Real Madrid LW 7.0 07-01-2009 2021.0 94 ... 90 81 76 85 88 14 7 15 11 11
1 Lionel Messi Argentina RW 10.0 FC Barcelona RW 10.0 07-01-2004 2018.0 93 ... 88 89 90 74 85 14 6 15 11 8
2 Neymar Brazil LW 10.0 FC Barcelona LW 11.0 07-01-2013 2021.0 92 ... 77 79 84 81 83 15 9 15 9 11
3 Luis Suárez Uruguay LS 9.0 FC Barcelona ST 9.0 07-11-2014 2021.0 92 ... 86 86 84 85 88 33 27 31 25 37
4 Manuel Neuer Germany GK 1.0 FC Bayern GK 1.0 07-01-2011 2021.0 92 ... 16 14 11 47 11 91 89 95 90 89

5 rows × 53 columns

In [19]:
player_df = player_df[["Name" ,"Nationality", "Rating", "Club_Position", "Ball_Control", "Dribbling", "Marking","Sliding_Tackle", "Standing_Tackle", "Aggression", "Reactions", "Attacking_Position", "Crossing", "Acceleration", "Balance", "GK_Positioning","GK_Diving","GK_Kicking","GK_Reflexes","Penalties","Volleys"]]
In [20]:
#Unique positions from all the club
unique_position = player_df["Club_Position"].unique()
unique_position
Out[20]:
array(['LW', 'RW', 'ST', 'GK', 'Sub', 'RCM', 'CAM', 'LCB', 'LCM', 'RS',
       'RB', 'RCB', 'LM', 'LDM', 'RM', 'LB', 'CDM', 'RDM', 'LF', 'CB',
       'LAM', 'Res', 'CM', 'LS', 'RF', 'RWB', 'RAM', 'LWB', nan, 'CF'], dtype=object)
In [21]:
unique_position.shape
Out[21]:
(30,)

Grouping positions into categories

In [22]:
# Grouping positions
attackers = ['LW', 'RW', 'ST', 'RS', 'LF', 'CF', 'RF', 'LS']
midfielders = ['RCM', 'CAM', 'LCM', 'LM', 'LDM', 'RM', 'CDM', 'RDM', 'LAM', 'CM', 'RAM']
defenders = ['LCB', 'RB', 'RCB', 'LB', 'CDM', 'RDM', 'CB', 'RWB', 'LWB']
goalkeepers = ['GK']
others = ['Sub', 'Res'] # Remove these rows
In [23]:
player_df = player_df[(player_df.Club_Position != 'Sub') & (player_df.Club_Position != 'Res')]
In [24]:
player_df.head(5)
Out[24]:
Name Nationality Rating Club_Position Ball_Control Dribbling Marking Sliding_Tackle Standing_Tackle Aggression ... Attacking_Position Crossing Acceleration Balance GK_Positioning GK_Diving GK_Kicking GK_Reflexes Penalties Volleys
0 Cristiano Ronaldo Portugal 94 LW 93 92 22 23 31 63 ... 94 84 91 63 14 7 15 11 85 88
1 Lionel Messi Argentina 93 RW 95 97 13 26 28 48 ... 93 77 92 95 14 6 15 8 74 85
2 Neymar Brazil 92 LW 95 96 21 33 24 56 ... 90 75 93 82 15 9 15 11 81 83
3 Luis Suárez Uruguay 92 ST 91 86 30 38 45 78 ... 92 77 88 60 33 27 31 37 85 88
4 Manuel Neuer Germany 92 GK 48 30 10 11 10 29 ... 12 15 58 35 91 89 95 89 47 11

5 rows × 21 columns

In [25]:
player_df[player_df['Club_Position'].isin(attackers)].head(5)
Out[25]:
Name Nationality Rating Club_Position Ball_Control Dribbling Marking Sliding_Tackle Standing_Tackle Aggression ... Attacking_Position Crossing Acceleration Balance GK_Positioning GK_Diving GK_Kicking GK_Reflexes Penalties Volleys
0 Cristiano Ronaldo Portugal 94 LW 93 92 22 23 31 63 ... 94 84 91 63 14 7 15 11 85 88
1 Lionel Messi Argentina 93 RW 95 97 13 26 28 48 ... 93 77 92 95 14 6 15 8 74 85
2 Neymar Brazil 92 LW 95 96 21 33 24 56 ... 90 75 93 82 15 9 15 11 81 83
3 Luis Suárez Uruguay 92 ST 91 86 30 38 45 78 ... 92 77 88 60 33 27 31 37 85 88
6 Robert Lewandowski Poland 90 ST 87 85 25 19 42 80 ... 89 62 79 79 8 15 12 10 81 86

5 rows × 21 columns

In [26]:
player_df[player_df['Club_Position'].isin(midfielders)].head(5)
Out[26]:
Name Nationality Rating Club_Position Ball_Control Dribbling Marking Sliding_Tackle Standing_Tackle Aggression ... Attacking_Position Crossing Acceleration Balance GK_Positioning GK_Diving GK_Kicking GK_Reflexes Penalties Volleys
12 Luka Modrić Croatia 89 RCM 92 86 66 73 80 62 ... 79 78 77 94 14 13 7 9 80 74
13 Mesut Özil Germany 89 CAM 90 86 22 19 16 48 ... 84 82 77 70 6 6 10 14 67 77
18 Paul Pogba France 88 LCM 90 89 68 76 75 72 ... 84 78 75 61 4 5 2 3 76 84
20 Kevin De Bruyne Belgium 88 RCM 86 85 30 40 39 68 ... 84 90 76 75 10 15 5 13 77 82
23 Toni Kroos Germany 88 LCM 85 78 63 65 75 60 ... 76 85 55 62 7 10 13 10 73 82

5 rows × 21 columns

In [27]:
player_df[player_df['Club_Position'].isin(defenders)].head(5)
Out[27]:
Name Nationality Rating Club_Position Ball_Control Dribbling Marking Sliding_Tackle Standing_Tackle Aggression ... Attacking_Position Crossing Acceleration Balance GK_Positioning GK_Diving GK_Kicking GK_Reflexes Penalties Volleys
15 Thiago Silva Brazil 89 LCB 80 68 90 89 91 77 ... 59 60 72 68 9 9 5 10 71 63
16 Sergio Ramos Spain 89 LCB 83 61 85 90 89 84 ... 52 66 77 60 7 11 9 11 68 66
24 Diego Godín Uruguay 88 LCB 76 53 87 89 86 86 ... 48 55 62 58 5 6 15 15 50 47
25 Mats Hummels Germany 88 LCB 77 68 85 87 92 66 ... 56 64 62 58 5 15 10 6 68 60
27 Giorgio Chiellini Italy 88 LCB 55 56 92 90 92 90 ... 28 58 69 65 4 3 2 3 50 45

5 rows × 21 columns

In [28]:
player_df[player_df['Club_Position'].isin(goalkeepers)].head(5)
Out[28]:
Name Nationality Rating Club_Position Ball_Control Dribbling Marking Sliding_Tackle Standing_Tackle Aggression ... Attacking_Position Crossing Acceleration Balance GK_Positioning GK_Diving GK_Kicking GK_Reflexes Penalties Volleys
4 Manuel Neuer Germany 92 GK 48 30 10 11 10 29 ... 12 15 58 35 91 89 95 89 47 11
5 De Gea Spain 90 GK 31 13 13 13 21 38 ... 12 17 56 43 86 88 87 90 40 13
9 Thibaut Courtois Belgium 89 GK 23 13 11 16 18 23 ... 13 14 46 45 86 84 69 89 27 12
26 Hugo Lloris France 88 GK 34 10 12 18 10 31 ... 10 13 65 54 82 87 68 90 40 11
30 Petr Čech Czech Republic 88 GK 22 12 11 12 13 17 ... 13 19 42 34 85 83 77 85 23 17

5 rows × 21 columns

In [29]:
player_df.shape
Out[29]:
(6950, 21)

Best Players at Each Club Positions

In [30]:
from pandasql import sqldf
pysqldf = lambda q: sqldf(q, globals())
query = '''select Name, Nationality, max(Rating) as Rating, Club_Position from player_df group by Club_Position'''
res = pysqldf(query)
res
Out[30]:
Name Nationality Rating Club_Position
0 Didier Drogba Ivory Coast 81 None
1 Mesut Özil Germany 89 CAM
2 Toby Alderweireld Belgium 85 CB
3 Sergio Busquets Spain 86 CDM
4 Teddy Chevalier France 70 CF
5 Adrien Silva Portugal 83 CM
6 Manuel Neuer Germany 92 GK
7 Ivan Perišić Croatia 84 LAM
8 David Alaba Austria 86 LB
9 Thiago Silva Brazil 89 LCB
10 Paul Pogba France 88 LCM
11 Arturo Vidal Chile 87 LDM
12 Radja Nainggolan Belgium 86 LF
13 Iniesta Spain 88 LM
14 Sebastian Giovinco Italy 83 LS
15 Cristiano Ronaldo Portugal 94 LW
16 Danny Rose England 81 LWB
17 Antonio Candreva Italy 81 RAM
18 Philipp Lahm Germany 88 RB
19 Pepe Portugal 88 RCB
20 Luka Modrić Croatia 89 RCM
21 Thiago Spain 86 RDM
22 Borja Valero Spain 83 RF
23 Arjen Robben Netherlands 87 RM
24 Antoine Griezmann France 88 RS
25 Lionel Messi Argentina 93 RW
26 Kyle Walker England 82 RWB
27 Luis Suárez Uruguay 92 ST

Which Attributes a player has to develop

In [31]:
'''
Considering players skill below 95 percentile has to be improved, marking player Attributes development as “Yes”
'''
player_attdf = pd.read_csv("data/FullData.csv")
In [32]:
player_attdf = player_attdf[['Name', 'Nationality', 'Rating', 'Preffered_Position', 'Skill_Moves',
       'Ball_Control', 'Dribbling', 'Marking', 'Sliding_Tackle',
       'Standing_Tackle', 'Aggression', 'Reactions', 'Attacking_Position',
       'Interceptions', 'Vision', 'Composure', 'Crossing', 'Short_Pass',
       'Long_Pass', 'Acceleration', 'Speed', 'Stamina', 'Strength', 'Balance',
       'Agility', 'Jumping', 'Heading', 'Shot_Power', 'Finishing',
       'Long_Shots', 'Curve', 'Freekick_Accuracy', 'Penalties', 'Volleys',
       'GK_Positioning', 'GK_Diving', 'GK_Kicking', 'GK_Handling',
       'GK_Reflexes']]
In [33]:
player_attdf.Preffered_Position.replace("/*", "", regex=True, inplace=True)
In [34]:
player_attdf.Preffered_Position.unique()
Out[34]:
array(['LWST', 'RW', 'LW', 'ST', 'GK', 'CB', 'LWLM', 'CMCDM', 'CAMLW',
       'CMCAM', 'STLW', 'CAMRMLM', 'LMCAM', 'STRM', 'RBCM', 'CM', 'CAMRM',
       'CAMLM', 'RMRW', 'STCAM', 'LBCM', 'RMCAM', 'CFCAMST', 'CDMCM',
       'CMCDMLM', 'LB', 'LWCAM', 'RWCAM', 'LMLW', 'CAMCDM', 'CAMCM',
       'CBCDM', 'RB', 'CMRM', 'LBLM', 'LM', 'CBRB', 'CAM', 'CMLW', 'CDM',
       'RM', 'CBLB', 'LMRM', 'STRW', 'CMRW', 'RWLW', 'CMLM', 'CFST',
       'STLM', 'RBCB', 'LMCMCAM', 'CAMCF', 'CAMST', 'LWCAMLM', 'RBRM',
       'RMLM', 'CAMLMLW', 'LMST', 'RWRM', 'CAMSTCF', 'CDMCB', 'LBCB',
       'RBRWB', 'RWST', 'LWCF', 'CDMRM', 'RMST', 'LBCDM', 'LMRW', 'RMLW',
       'RWCMLW', 'RBRW', 'LBRB', 'LBLWB', 'CAMRW', 'LWRW', 'CMRBCDM',
       'CDMRB', 'CMCAMLM', 'CFRM', 'RBLB', 'CFCAM', 'LMCM', 'CFLW',
       'RMCAMST', 'STRMRW', 'CAMCMLM', 'RBRWRWB', 'RMRB', 'CMCB',
       'CAMLMCM', 'LWBLB', 'CMRBRM', 'LMRB', 'STCF', 'LWCAMCF', 'CMCF',
       'CAMLMCDM', 'LMLWLB', 'LWCM', 'CDMCAM', 'RWBLM', 'CMCAMRM',
       'CMCDMCAM', 'RMRWB', 'CAMSTCM', 'RWBRB', 'LMCDM', 'RBCDM',
       'CDMCMRB', 'CDMCBRB', 'CFLM', 'RWLWRM', 'LMLB', 'RMCF', 'CMRMCDM',
       'LMLWB', 'RMCM', 'LMRMRW', 'CAMCMCDM', 'RMCFLW', 'CAMLMRM',
       'CMCFRW', 'LBCAM', 'RWLWCF', 'RMLMCAM', 'LWRM', 'RMCMCAM', 'LBLW',
       'CDMCMCAM', 'RWRB', 'RMCDMLM', 'CFCAMRM', 'RWBRM', 'CBRBCM',
       'LBLMLW', 'CDMLB', 'CFRW', 'CAMRB', 'RBRWLW', 'LMRMCF', 'RWCM',
       'CBCM', 'RWLM', 'STLMRB', 'CMRB', 'LMRMCM', 'LBCDMCM', 'CBLBCDM',
       'CAMRWLW', 'CMRMLM', 'CAMCFCDM', 'LWLMRM', 'RBCBRWB', 'CMLB',
       'CBLM', 'RWLMLW', 'LBRM', 'LMCF', 'RMLB', 'CBRWB', 'STLWRW',
       'LWBCDM', 'CMLMCB', 'CMLMRM', 'RMCAMLM', 'CMRMCAM', 'STLMCF',
       'RBLM', 'CFRMLM', 'STRWLW', 'CAMLB', 'RMRWRWB', 'CAMRMCM', 'LMRMST',
       'STCM', 'LWBCB', 'STRWB', 'CDMLM', 'STRMCAM', 'CAMCMRM', 'CMLMCDM',
       'CFCM', 'CAMCDMLM', 'LBLWBCM', 'RMCMCDM', 'RMCDM', 'RMLWLM', 'CMST',
       'CAMRMLW', 'CAMCMCF', 'LWLB', 'LWBCMRWB', 'RWCFRM', 'RWCF',
       'RBRMLM', 'LBRW', 'LWBLW', 'CAMRWB', 'RMRWLW', 'CMRMLB', 'LWRWCAM',
       'RMLMRB', 'LMCAMST', 'RMCMLWB', 'CBLMLWB', 'LMSTCB', 'LWBRWB',
       'CBLWB', 'RBRMCDM', 'CF', 'RWCAMST', 'LWBRM', 'CDMRMRB', 'RMCAMCM',
       'STRWRM', 'CAMRMST', 'CAMCMST', 'LMCMRM', 'RMCAMCDM', 'RWBLB',
       'STRWLM', 'RBCAM', 'STCAMLM', 'CDMRBRM', 'CMLWB', 'STCFCAM', 'CBST',
       'STLMCAM', 'RBCAMCM', 'RWRMLW', 'CMRWLW', 'LMCAMRB', 'STRMLW',
       'RMLWRW', 'CDMCAMRM', 'LMSTCAM', 'RWB', 'CDMRMCAM', 'RWBLWB',
       'LMRMLB', 'CMRWB', 'RBLMCAM', 'RBCDMCB', 'LWBLM', 'LMCAMCM',
       'CAMRWST', 'LWSTCF', 'CDMLMCAM', 'CAMCFST', 'RWLWCM', 'RBRMLB',
       'RMLMCDM', 'STLWLM', 'RWBCM', 'CMLMCAM', 'LBLWBRWB', 'RBRMCAM',
       'LBLMRM', 'CDMRBCB', 'LWCAMST', 'LWB', 'CDMRWB', 'LMCMCB',
       'CBCDMCM', 'RBST', 'RWSTCM', 'RWLWST', 'CMSTCDM', 'STCB', 'LWLMLB',
       'RWLB', 'CMCDMRM', 'STCFRM', 'CAMLMST', 'RMLMCM', 'RWRMST',
       'CAMLWB', 'CDMLMRM', 'CMCDMRB', 'LWLWB', 'RMCB', 'RBLWB', 'RBLBCDM',
       'LBST', 'RWSTLM', 'STLMRM', 'CBCMRM', 'STCDM', 'CBRM', 'STLB',
       'RBLBCM'], dtype=object)
In [35]:
player_attdf.Preffered_Position.unique().shape
Out[35]:
(292,)
In [36]:
'''
Changing all positions into - Forward , Midfielder, Defender , Goalkeeper
In the Preffered Position column
For Forward player - preffered position ends with W
For Defender player - preffered position ends with W
For Midfielder player - preffered position ends with W
For Goalkeeper - preffered position is GK
preffered position - STCF,ST,CF,CFST are Forward player
'''
player_attdf.Preffered_Position.replace("/*.^", "", regex=True, inplace=True)
player_attdf.Preffered_Position.replace("^.*W", "Forward", regex=True, inplace=True)
player_attdf.Preffered_Position.replace("Forward*.^", "Forward", regex=True, inplace=True)
player_attdf.Preffered_Position.replace("GK", "Goalkeeper", regex=True, inplace=True)
player_attdf.Preffered_Position.replace("^.*B", "Defender", regex=True, inplace=True)
player_attdf.Preffered_Position.replace("Defender*.^", "Defender", regex=True, inplace=True)
player_attdf.Preffered_Position.replace("^.*M", "Midfielder", regex=True, inplace=True)
player_attdf.Preffered_Position.replace("Midfielder*.^", "Midfielder", regex=True, inplace=True)
player_attdf.Preffered_Position.replace("STCF", "Forward", regex=True, inplace=True)
player_attdf.Preffered_Position.replace("ST", "Forward", regex=True, inplace=True)
player_attdf.Preffered_Position.replace("CF", "Forward", regex=True, inplace=True)
player_attdf.Preffered_Position.replace("CFST", "Forward", regex=True, inplace=True)
In [37]:
#getting unique positions after above all processing
player_attdf.Preffered_Position.unique()
Out[37]:
array(['ForwardForward', 'Forward', 'Goalkeeper', 'Defender', 'Midfielder',
       'MidfielderForward', 'DefenderForward', 'MidfielderForwardForward'], dtype=object)
In [38]:
player_attdf.head(5)
Out[38]:
Name Nationality Rating Preffered_Position Skill_Moves Ball_Control Dribbling Marking Sliding_Tackle Standing_Tackle ... Long_Shots Curve Freekick_Accuracy Penalties Volleys GK_Positioning GK_Diving GK_Kicking GK_Handling GK_Reflexes
0 Cristiano Ronaldo Portugal 94 ForwardForward 5 93 92 22 23 31 ... 90 81 76 85 88 14 7 15 11 11
1 Lionel Messi Argentina 93 Forward 4 95 97 13 26 28 ... 88 89 90 74 85 14 6 15 11 8
2 Neymar Brazil 92 Forward 5 95 96 21 33 24 ... 77 79 84 81 83 15 9 15 9 11
3 Luis Suárez Uruguay 92 Forward 4 91 86 30 38 45 ... 86 86 84 85 88 33 27 31 25 37
4 Manuel Neuer Germany 92 Goalkeeper 1 48 30 10 11 10 ... 16 14 11 47 11 91 89 95 90 89

5 rows × 39 columns

In [39]:
player_attdf.shape
Out[39]:
(17588, 39)

Best Players at positions - Attacker, Midfielder, Defender and Goalkeeper

In [40]:
from pandasql import sqldf
pysqldf = lambda q: sqldf(q, globals())
query = '''select Name, Nationality, max(Rating) as Rating, Preffered_Position from player_attdf group by Preffered_Position'''
res = pysqldf(query)
res
Out[40]:
Name Nationality Rating Preffered_Position
0 Jérôme Boateng Germany 89 Defender
1 Túlio Japan 68 DefenderForward
2 Lionel Messi Argentina 93 Forward
3 Cristiano Ronaldo Portugal 94 ForwardForward
4 Manuel Neuer Germany 92 Goalkeeper
5 Eden Hazard Belgium 89 Midfielder
6 Thomas Müller Germany 86 MidfielderForward
7 Lukas Thürauer Austria 65 MidfielderForwardForward
In [41]:
#Creating seperate df for Each category
forward_df = player_attdf.loc[player_attdf['Preffered_Position'] == 'Forward']
midfielder_df = player_attdf.loc[player_attdf['Preffered_Position'] == 'Midfielder']
defender_df = player_attdf.loc[player_attdf['Preffered_Position'] == 'Defender']
goalkeeper_df = player_attdf.loc[player_attdf['Preffered_Position'] == 'Goalkeeper']
In [42]:
forward_df.head(5)
Out[42]:
Name Nationality Rating Preffered_Position Skill_Moves Ball_Control Dribbling Marking Sliding_Tackle Standing_Tackle ... Long_Shots Curve Freekick_Accuracy Penalties Volleys GK_Positioning GK_Diving GK_Kicking GK_Handling GK_Reflexes
1 Lionel Messi Argentina 93 Forward 4 95 97 13 26 28 ... 88 89 90 74 85 14 6 15 11 8
2 Neymar Brazil 92 Forward 5 95 96 21 33 24 ... 77 79 84 81 83 15 9 15 9 11
3 Luis Suárez Uruguay 92 Forward 4 91 86 30 38 45 ... 86 86 84 85 88 33 27 31 25 37
6 Robert Lewandowski Poland 90 Forward 3 87 85 25 19 42 ... 82 77 76 81 86 8 15 12 6 10
7 Gareth Bale Wales 90 Forward 4 88 89 51 52 55 ... 90 86 85 76 76 5 15 11 15 6

5 rows × 39 columns

In [43]:
midfielder_df.head(5)
Out[43]:
Name Nationality Rating Preffered_Position Skill_Moves Ball_Control Dribbling Marking Sliding_Tackle Standing_Tackle ... Long_Shots Curve Freekick_Accuracy Penalties Volleys GK_Positioning GK_Diving GK_Kicking GK_Handling GK_Reflexes
11 Eden Hazard Belgium 89 Midfielder 4 91 93 25 22 27 ... 82 82 79 86 79 8 11 6 12 8
12 Luka Modrić Croatia 89 Midfielder 4 92 86 66 73 80 ... 82 79 77 80 74 14 13 7 9 9
18 Paul Pogba France 88 Midfielder 5 90 89 68 76 75 ... 88 82 82 76 84 4 5 2 6 3
20 Kevin De Bruyne Belgium 88 Midfielder 4 86 85 30 40 39 ... 86 81 84 77 82 10 15 5 13 13
21 Marco Reus Germany 88 Midfielder 4 85 86 30 46 36 ... 85 90 84 84 88 13 12 13 12 11

5 rows × 39 columns

In [44]:
defender_df.head(5)
Out[44]:
Name Nationality Rating Preffered_Position Skill_Moves Ball_Control Dribbling Marking Sliding_Tackle Standing_Tackle ... Long_Shots Curve Freekick_Accuracy Penalties Volleys GK_Positioning GK_Diving GK_Kicking GK_Handling GK_Reflexes
10 Jérôme Boateng Germany 89 Defender 2 72 67 90 91 92 ... 58 56 31 46 53 6 7 15 12 5
15 Thiago Silva Brazil 89 Defender 3 80 68 90 89 91 ... 71 61 73 71 63 9 9 5 12 10
16 Sergio Ramos Spain 89 Defender 3 83 61 85 90 89 ... 55 73 67 68 66 7 11 9 8 11
24 Diego Godín Uruguay 88 Defender 2 76 53 87 89 86 ... 43 49 51 50 47 5 6 15 8 15
25 Mats Hummels Germany 88 Defender 2 77 68 85 87 92 ... 51 65 53 68 60 5 15 10 6 6

5 rows × 39 columns

In [45]:
goalkeeper_df.head(5)
Out[45]:
Name Nationality Rating Preffered_Position Skill_Moves Ball_Control Dribbling Marking Sliding_Tackle Standing_Tackle ... Long_Shots Curve Freekick_Accuracy Penalties Volleys GK_Positioning GK_Diving GK_Kicking GK_Handling GK_Reflexes
4 Manuel Neuer Germany 92 Goalkeeper 1 48 30 10 11 10 ... 16 14 11 47 11 91 89 95 90 89
5 De Gea Spain 90 Goalkeeper 1 31 13 13 13 21 ... 12 21 19 40 13 86 88 87 85 90
9 Thibaut Courtois Belgium 89 Goalkeeper 1 23 13 11 16 18 ... 17 19 11 27 12 86 84 69 91 89
26 Hugo Lloris France 88 Goalkeeper 1 34 10 12 18 10 ... 14 11 10 40 11 82 87 68 87 90
30 Petr Čech Czech Republic 88 Goalkeeper 1 22 12 11 12 13 ... 11 13 19 23 17 85 83 77 90 85

5 rows × 39 columns

Attributes which a player has to develop

For Forwards

In [46]:
forward_df.Skill_Moves = np.where(forward_df.Skill_Moves >= forward_df.Skill_Moves.quantile(q=.95), 'No', 'Yes')
forward_df.Ball_Control = np.where(forward_df.Ball_Control >= forward_df.Ball_Control.quantile(q=.95), 'No', 'Yes')
forward_df.Dribbling = np.where(forward_df.Dribbling >= forward_df.Dribbling.quantile(q=.95), 'No', 'Yes')
forward_df.Marking = np.where(forward_df.Marking >= forward_df.Marking.quantile(q=.95), 'No', 'Yes')
forward_df.Sliding_Tackle = np.where(forward_df.Sliding_Tackle >= forward_df.Sliding_Tackle.quantile(q=.95), 'No', 'Yes')
forward_df.Standing_Tackle = np.where(forward_df.Standing_Tackle >= forward_df.Standing_Tackle.quantile(q=.95), 'No', 'Yes')
forward_df.Aggression = np.where(forward_df.Aggression >= forward_df.Aggression.quantile(q=.95), 'No', 'Yes')
forward_df.Reactions = np.where(forward_df.Reactions >= forward_df.Reactions.quantile(q=.95), 'No', 'Yes')
forward_df.Attacking_Position = np.where(forward_df.Attacking_Position >= forward_df.Attacking_Position.quantile(q=.95), 'No', 'Yes')
forward_df.Interceptions = np.where(forward_df.Interceptions >= forward_df.Interceptions.quantile(q=.95), 'No', 'Yes')
forward_df.Vision = np.where(forward_df.Vision >= forward_df.Vision.quantile(q=.95), 'No', 'Yes')
forward_df.Composure = np.where(forward_df.Composure >= forward_df.Composure.quantile(q=.95), 'No', 'Yes')
forward_df.Crossing = np.where(forward_df.Crossing >= forward_df.Crossing.quantile(q=.95), 'No', 'Yes')
forward_df.Short_Pass = np.where(forward_df.Short_Pass >= forward_df.Short_Pass.quantile(q=.95), 'No', 'Yes')
forward_df.Long_Pass = np.where(forward_df.Long_Pass >= forward_df.Long_Pass.quantile(q=.95), 'No', 'Yes')
forward_df.Acceleration = np.where(forward_df.Acceleration >= forward_df.Acceleration.quantile(q=.95), 'No', 'Yes')
forward_df.Speed = np.where(forward_df.Speed >= forward_df.Speed.quantile(q=.95), 'No', 'Yes')
forward_df.Stamina = np.where(forward_df.Stamina >= forward_df.Stamina.quantile(q=.95), 'No', 'Yes')
forward_df.Strength = np.where(forward_df.Strength >= forward_df.Strength.quantile(q=.95), 'No', 'Yes')
forward_df.Balance = np.where(forward_df.Balance >= forward_df.Balance.quantile(q=.95), 'No', 'Yes')
forward_df.Agility = np.where(forward_df.Agility >= forward_df.Agility.quantile(q=.95), 'No', 'Yes')
forward_df.Jumping = np.where(forward_df.Jumping >= forward_df.Jumping.quantile(q=.95), 'No', 'Yes')
forward_df.Heading = np.where(forward_df.Heading >= forward_df.Heading.quantile(q=.95), 'No', 'Yes')
forward_df.Shot_Power = np.where(forward_df.Shot_Power >= forward_df.Shot_Power.quantile(q=.95), 'No', 'Yes')
forward_df.Finishing = np.where(forward_df.Finishing >= forward_df.Finishing.quantile(q=.95), 'No', 'Yes')
forward_df.Long_Shots = np.where(forward_df.Long_Shots >= forward_df.Long_Shots.quantile(q=.95), 'No', 'Yes')
forward_df.Curve = np.where(forward_df.Curve >= forward_df.Curve.quantile(q=.95), 'No', 'Yes')
forward_df.Freekick_Accuracy = np.where(forward_df.Freekick_Accuracy >= forward_df.Freekick_Accuracy.quantile(q=.95), 'No', 'Yes')
forward_df.Penalties = np.where(forward_df.Penalties >= forward_df.Penalties.quantile(q=.95), 'No', 'Yes')
forward_df.Volleys = np.where(forward_df.Volleys >= forward_df.Volleys.quantile(q=.95), 'No', 'Yes')
forward_df.GK_Positioning = np.where(forward_df.GK_Positioning >= forward_df.GK_Positioning.quantile(q=.95), 'No', 'Yes')
forward_df.GK_Diving = np.where(forward_df.GK_Diving >= forward_df.GK_Diving.quantile(q=.95), 'No', 'Yes')
forward_df.GK_Kicking = np.where(forward_df.GK_Kicking >= forward_df.GK_Kicking.quantile(q=.95), 'No', 'Yes')
forward_df.GK_Handling = np.where(forward_df.GK_Handling >= forward_df.GK_Handling.quantile(q=.95), 'No', 'Yes')
forward_df.GK_Reflexes = np.where(forward_df.GK_Reflexes >= forward_df.GK_Reflexes.quantile(q=.95), 'No', 'Yes')
In [47]:
from IPython.display import display
pd.options.display.max_columns = None
display(forward_df.head(5))
Name Nationality Rating Preffered_Position Skill_Moves Ball_Control Dribbling Marking Sliding_Tackle Standing_Tackle Aggression Reactions Attacking_Position Interceptions Vision Composure Crossing Short_Pass Long_Pass Acceleration Speed Stamina Strength Balance Agility Jumping Heading Shot_Power Finishing Long_Shots Curve Freekick_Accuracy Penalties Volleys GK_Positioning GK_Diving GK_Kicking GK_Handling GK_Reflexes
1 Lionel Messi Argentina 93 Forward No No No Yes Yes Yes Yes No No Yes No No No No No No No Yes Yes No No Yes Yes No No No No No Yes No Yes Yes No Yes Yes
2 Neymar Brazil 92 Forward No No No Yes Yes Yes Yes No No Yes No No No No No No No Yes Yes Yes No Yes Yes Yes No No No No No No No Yes No Yes Yes
3 Luis Suárez Uruguay 92 Forward No No No Yes Yes Yes No No No Yes No No No No Yes No Yes No Yes Yes No Yes Yes No No No No No No No No No No No No
6 Robert Lewandowski Poland 90 Forward Yes No No Yes Yes Yes No No No Yes No No Yes No Yes Yes Yes Yes Yes Yes Yes No No No No No No No No No Yes No Yes Yes Yes
7 Gareth Bale Wales 90 Forward No No No No No No Yes No No No No No No No No No No Yes Yes Yes Yes No No No No No No No Yes No Yes No Yes Yes Yes

For Midfielders

In [48]:
midfielder_df.Skill_Moves = np.where(midfielder_df.Skill_Moves >= midfielder_df.Skill_Moves.quantile(q=.95), 'No', 'Yes')
midfielder_df.Ball_Control = np.where(midfielder_df.Ball_Control >= midfielder_df.Ball_Control.quantile(q=.95), 'No', 'Yes')
midfielder_df.Dribbling = np.where(midfielder_df.Dribbling >= midfielder_df.Dribbling.quantile(q=.95), 'No', 'Yes')
midfielder_df.Marking = np.where(midfielder_df.Marking >= midfielder_df.Marking.quantile(q=.95), 'No', 'Yes')
midfielder_df.Sliding_Tackle = np.where(midfielder_df.Sliding_Tackle >= midfielder_df.Sliding_Tackle.quantile(q=.95), 'No', 'Yes')
midfielder_df.Standing_Tackle = np.where(midfielder_df.Standing_Tackle >= midfielder_df.Standing_Tackle.quantile(q=.95), 'No', 'Yes')
midfielder_df.Aggression = np.where(midfielder_df.Aggression >= midfielder_df.Aggression.quantile(q=.95), 'No', 'Yes')
midfielder_df.Reactions = np.where(midfielder_df.Reactions >= midfielder_df.Reactions.quantile(q=.95), 'No', 'Yes')
midfielder_df.Attacking_Position = np.where(midfielder_df.Attacking_Position >= midfielder_df.Attacking_Position.quantile(q=.95), 'No', 'Yes')
midfielder_df.Interceptions = np.where(midfielder_df.Interceptions >= midfielder_df.Interceptions.quantile(q=.95), 'No', 'Yes')
midfielder_df.Vision = np.where(midfielder_df.Vision >= midfielder_df.Vision.quantile(q=.95), 'No', 'Yes')
midfielder_df.Composure = np.where(midfielder_df.Composure >= midfielder_df.Composure.quantile(q=.95), 'No', 'Yes')
midfielder_df.Crossing = np.where(midfielder_df.Crossing >= midfielder_df.Crossing.quantile(q=.95), 'No', 'Yes')
midfielder_df.Short_Pass = np.where(midfielder_df.Short_Pass >= midfielder_df.Short_Pass.quantile(q=.95), 'No', 'Yes')
midfielder_df.Long_Pass = np.where(midfielder_df.Long_Pass >= midfielder_df.Long_Pass.quantile(q=.95), 'No', 'Yes')
midfielder_df.Acceleration = np.where(midfielder_df.Acceleration >= midfielder_df.Acceleration.quantile(q=.95), 'No', 'Yes')
midfielder_df.Speed = np.where(midfielder_df.Speed >= midfielder_df.Speed.quantile(q=.95), 'No', 'Yes')
midfielder_df.Stamina = np.where(midfielder_df.Stamina >= midfielder_df.Stamina.quantile(q=.95), 'No', 'Yes')
midfielder_df.Strength = np.where(midfielder_df.Strength >= midfielder_df.Strength.quantile(q=.95), 'No', 'Yes')
midfielder_df.Balance = np.where(midfielder_df.Balance >= midfielder_df.Balance.quantile(q=.95), 'No', 'Yes')
midfielder_df.Agility = np.where(midfielder_df.Agility >= midfielder_df.Agility.quantile(q=.95), 'No', 'Yes')
midfielder_df.Jumping = np.where(midfielder_df.Jumping >= midfielder_df.Jumping.quantile(q=.95), 'No', 'Yes')
midfielder_df.Heading = np.where(midfielder_df.Heading >= midfielder_df.Heading.quantile(q=.95), 'No', 'Yes')
midfielder_df.Shot_Power = np.where(midfielder_df.Shot_Power >= midfielder_df.Shot_Power.quantile(q=.95), 'No', 'Yes')
midfielder_df.Finishing = np.where(midfielder_df.Finishing >= midfielder_df.Finishing.quantile(q=.95), 'No', 'Yes')
midfielder_df.Long_Shots = np.where(midfielder_df.Long_Shots >= midfielder_df.Long_Shots.quantile(q=.95), 'No', 'Yes')
midfielder_df.Curve = np.where(midfielder_df.Curve >= midfielder_df.Curve.quantile(q=.95), 'No', 'Yes')
midfielder_df.Freekick_Accuracy = np.where(midfielder_df.Freekick_Accuracy >= midfielder_df.Freekick_Accuracy.quantile(q=.95), 'No', 'Yes')
midfielder_df.Penalties = np.where(midfielder_df.Penalties >= midfielder_df.Penalties.quantile(q=.95), 'No', 'Yes')
midfielder_df.Volleys = np.where(midfielder_df.Volleys >= midfielder_df.Volleys.quantile(q=.95), 'No', 'Yes')
midfielder_df.GK_Positioning = np.where(midfielder_df.GK_Positioning >= midfielder_df.GK_Positioning.quantile(q=.95), 'No', 'Yes')
midfielder_df.GK_Diving = np.where(midfielder_df.GK_Diving >= midfielder_df.GK_Diving.quantile(q=.95), 'No', 'Yes')
midfielder_df.GK_Kicking = np.where(midfielder_df.GK_Kicking >= midfielder_df.GK_Kicking.quantile(q=.95), 'No', 'Yes')
midfielder_df.GK_Handling = np.where(midfielder_df.GK_Handling >= midfielder_df.GK_Handling.quantile(q=.95), 'No', 'Yes')
midfielder_df.GK_Reflexes = np.where(midfielder_df.GK_Reflexes >= midfielder_df.GK_Reflexes.quantile(q=.95), 'No', 'Yes')
In [49]:
display(midfielder_df.head(5))
Name Nationality Rating Preffered_Position Skill_Moves Ball_Control Dribbling Marking Sliding_Tackle Standing_Tackle Aggression Reactions Attacking_Position Interceptions Vision Composure Crossing Short_Pass Long_Pass Acceleration Speed Stamina Strength Balance Agility Jumping Heading Shot_Power Finishing Long_Shots Curve Freekick_Accuracy Penalties Volleys GK_Positioning GK_Diving GK_Kicking GK_Handling GK_Reflexes
11 Eden Hazard Belgium 89 Midfielder No No No Yes Yes Yes Yes No No Yes No No No No No No No Yes Yes No No Yes Yes Yes No No No No No No Yes Yes Yes Yes Yes
12 Luka Modrić Croatia 89 Midfielder No No No Yes No No Yes No No No No Yes No No No Yes Yes Yes Yes No No Yes Yes Yes Yes No No No No No Yes Yes Yes Yes Yes
18 Paul Pogba France 88 Midfielder No No No Yes No No Yes No No Yes No No No No No Yes Yes No No Yes Yes No No No Yes No No No No No Yes Yes Yes Yes Yes
20 Kevin De Bruyne Belgium 88 Midfielder No No No Yes Yes Yes Yes No No Yes No No No No No Yes Yes Yes Yes Yes Yes Yes Yes No No No No No No No Yes No Yes Yes Yes
21 Marco Reus Germany 88 Midfielder No No No Yes Yes Yes Yes No No Yes No No No No No No No Yes Yes Yes No Yes Yes No No No No No No No Yes Yes Yes Yes Yes

For Defenders

In [50]:
defender_df.Skill_Moves = np.where(defender_df.Skill_Moves >= defender_df.Skill_Moves.quantile(q=.95), 'No', 'Yes')
defender_df.Ball_Control = np.where(defender_df.Ball_Control >= defender_df.Ball_Control.quantile(q=.95), 'No', 'Yes')
defender_df.Dribbling = np.where(defender_df.Dribbling >= defender_df.Dribbling.quantile(q=.95), 'No', 'Yes')
defender_df.Marking = np.where(defender_df.Marking >= defender_df.Marking.quantile(q=.95), 'No', 'Yes')
defender_df.Sliding_Tackle = np.where(defender_df.Sliding_Tackle >= defender_df.Sliding_Tackle.quantile(q=.95), 'No', 'Yes')
defender_df.Standing_Tackle = np.where(defender_df.Standing_Tackle >= defender_df.Standing_Tackle.quantile(q=.95), 'No', 'Yes')
defender_df.Aggression = np.where(defender_df.Aggression >= defender_df.Aggression.quantile(q=.95), 'No', 'Yes')
defender_df.Reactions = np.where(defender_df.Reactions >= defender_df.Reactions.quantile(q=.95), 'No', 'Yes')
defender_df.Attacking_Position = np.where(defender_df.Attacking_Position >= defender_df.Attacking_Position.quantile(q=.95), 'No', 'Yes')
defender_df.Interceptions = np.where(defender_df.Interceptions >= defender_df.Interceptions.quantile(q=.95), 'No', 'Yes')
defender_df.Vision = np.where(defender_df.Vision >= defender_df.Vision.quantile(q=.95), 'No', 'Yes')
defender_df.Composure = np.where(defender_df.Composure >= defender_df.Composure.quantile(q=.95), 'No', 'Yes')
defender_df.Crossing = np.where(defender_df.Crossing >= defender_df.Crossing.quantile(q=.95), 'No', 'Yes')
defender_df.Short_Pass = np.where(defender_df.Short_Pass >= defender_df.Short_Pass.quantile(q=.95), 'No', 'Yes')
defender_df.Long_Pass = np.where(defender_df.Long_Pass >= defender_df.Long_Pass.quantile(q=.95), 'No', 'Yes')
defender_df.Acceleration = np.where(defender_df.Acceleration >= defender_df.Acceleration.quantile(q=.95), 'No', 'Yes')
defender_df.Speed = np.where(defender_df.Speed >= defender_df.Speed.quantile(q=.95), 'No', 'Yes')
defender_df.Stamina = np.where(defender_df.Stamina >= defender_df.Stamina.quantile(q=.95), 'No', 'Yes')
defender_df.Strength = np.where(defender_df.Strength >= defender_df.Strength.quantile(q=.95), 'No', 'Yes')
defender_df.Balance = np.where(defender_df.Balance >= defender_df.Balance.quantile(q=.95), 'No', 'Yes')
defender_df.Agility = np.where(defender_df.Agility >= defender_df.Agility.quantile(q=.95), 'No', 'Yes')
defender_df.Jumping = np.where(defender_df.Jumping >= defender_df.Jumping.quantile(q=.95), 'No', 'Yes')
defender_df.Heading = np.where(defender_df.Heading >= defender_df.Heading.quantile(q=.95), 'No', 'Yes')
defender_df.Shot_Power = np.where(defender_df.Shot_Power >= defender_df.Shot_Power.quantile(q=.95), 'No', 'Yes')
defender_df.Finishing = np.where(defender_df.Finishing >= defender_df.Finishing.quantile(q=.95), 'No', 'Yes')
defender_df.Long_Shots = np.where(defender_df.Long_Shots >= defender_df.Long_Shots.quantile(q=.95), 'No', 'Yes')
defender_df.Curve = np.where(defender_df.Curve >= defender_df.Curve.quantile(q=.95), 'No', 'Yes')
defender_df.Freekick_Accuracy = np.where(defender_df.Freekick_Accuracy >= defender_df.Freekick_Accuracy.quantile(q=.95), 'No', 'Yes')
defender_df.Penalties = np.where(defender_df.Penalties >= defender_df.Penalties.quantile(q=.95), 'No', 'Yes')
defender_df.Volleys = np.where(defender_df.Volleys >= defender_df.Volleys.quantile(q=.95), 'No', 'Yes')
defender_df.GK_Positioning = np.where(defender_df.GK_Positioning >= defender_df.GK_Positioning.quantile(q=.95), 'No', 'Yes')
defender_df.GK_Diving = np.where(defender_df.GK_Diving >= defender_df.GK_Diving.quantile(q=.95), 'No', 'Yes')
defender_df.GK_Kicking = np.where(defender_df.GK_Kicking >= defender_df.GK_Kicking.quantile(q=.95), 'No', 'Yes')
defender_df.GK_Handling = np.where(defender_df.GK_Handling >= defender_df.GK_Handling.quantile(q=.95), 'No', 'Yes')
defender_df.GK_Reflexes = np.where(defender_df.GK_Reflexes >= defender_df.GK_Reflexes.quantile(q=.95), 'No', 'Yes')
In [51]:
display(defender_df.head(5))
Name Nationality Rating Preffered_Position Skill_Moves Ball_Control Dribbling Marking Sliding_Tackle Standing_Tackle Aggression Reactions Attacking_Position Interceptions Vision Composure Crossing Short_Pass Long_Pass Acceleration Speed Stamina Strength Balance Agility Jumping Heading Shot_Power Finishing Long_Shots Curve Freekick_Accuracy Penalties Volleys GK_Positioning GK_Diving GK_Kicking GK_Handling GK_Reflexes
10 Jérôme Boateng Germany 89 Defender Yes Yes Yes No No No No No Yes No No No Yes No No Yes Yes Yes No Yes Yes Yes No No Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes
15 Thiago Silva Brazil 89 Defender No No Yes No No No Yes No Yes No No No Yes No No Yes Yes Yes Yes Yes Yes No No No Yes No Yes No No No Yes Yes Yes Yes Yes
16 Sergio Ramos Spain 89 Defender No No Yes No No No No No Yes No Yes No Yes No Yes Yes Yes No Yes Yes No No No No No Yes No Yes No No Yes Yes Yes Yes Yes
24 Diego Godín Uruguay 88 Defender Yes No Yes No No No No No Yes No Yes No Yes No Yes Yes Yes Yes Yes Yes Yes No No Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes No
25 Mats Hummels Germany 88 Defender Yes No Yes No No No Yes No Yes No No No Yes No No Yes Yes Yes Yes Yes Yes Yes No Yes Yes Yes Yes Yes No No Yes No Yes Yes Yes

For Goalkeepers

In [52]:
goalkeeper_df.Skill_Moves = np.where(goalkeeper_df.Skill_Moves >= goalkeeper_df.Skill_Moves.quantile(q=.95), 'No', 'Yes')
goalkeeper_df.Ball_Control = np.where(goalkeeper_df.Ball_Control >= goalkeeper_df.Ball_Control.quantile(q=.95), 'No', 'Yes')
goalkeeper_df.Dribbling = np.where(goalkeeper_df.Dribbling >= goalkeeper_df.Dribbling.quantile(q=.95), 'No', 'Yes')
goalkeeper_df.Marking = np.where(goalkeeper_df.Marking >= goalkeeper_df.Marking.quantile(q=.95), 'No', 'Yes')
goalkeeper_df.Sliding_Tackle = np.where(goalkeeper_df.Sliding_Tackle >= goalkeeper_df.Sliding_Tackle.quantile(q=.95), 'No', 'Yes')
goalkeeper_df.Standing_Tackle = np.where(goalkeeper_df.Standing_Tackle >= goalkeeper_df.Standing_Tackle.quantile(q=.95), 'No', 'Yes')
goalkeeper_df.Aggression = np.where(goalkeeper_df.Aggression >= goalkeeper_df.Aggression.quantile(q=.95), 'No', 'Yes')
goalkeeper_df.Reactions = np.where(goalkeeper_df.Reactions >= goalkeeper_df.Reactions.quantile(q=.95), 'No', 'Yes')
goalkeeper_df.Attacking_Position = np.where(goalkeeper_df.Attacking_Position >= goalkeeper_df.Attacking_Position.quantile(q=.95), 'No', 'Yes')
goalkeeper_df.Interceptions = np.where(goalkeeper_df.Interceptions >= goalkeeper_df.Interceptions.quantile(q=.95), 'No', 'Yes')
goalkeeper_df.Vision = np.where(goalkeeper_df.Vision >= goalkeeper_df.Vision.quantile(q=.95), 'No', 'Yes')
goalkeeper_df.Composure = np.where(goalkeeper_df.Composure >= goalkeeper_df.Composure.quantile(q=.95), 'No', 'Yes')
goalkeeper_df.Crossing = np.where(goalkeeper_df.Crossing >= goalkeeper_df.Crossing.quantile(q=.95), 'No', 'Yes')
goalkeeper_df.Short_Pass = np.where(goalkeeper_df.Short_Pass >= goalkeeper_df.Short_Pass.quantile(q=.95), 'No', 'Yes')
goalkeeper_df.Long_Pass = np.where(goalkeeper_df.Long_Pass >= goalkeeper_df.Long_Pass.quantile(q=.95), 'No', 'Yes')
goalkeeper_df.Acceleration = np.where(goalkeeper_df.Acceleration >= goalkeeper_df.Acceleration.quantile(q=.95), 'No', 'Yes')
goalkeeper_df.Speed = np.where(goalkeeper_df.Speed >= goalkeeper_df.Speed.quantile(q=.95), 'No', 'Yes')
goalkeeper_df.Stamina = np.where(goalkeeper_df.Stamina >= goalkeeper_df.Stamina.quantile(q=.95), 'No', 'Yes')
goalkeeper_df.Strength = np.where(goalkeeper_df.Strength >= goalkeeper_df.Strength.quantile(q=.95), 'No', 'Yes')
goalkeeper_df.Balance = np.where(goalkeeper_df.Balance >= goalkeeper_df.Balance.quantile(q=.95), 'No', 'Yes')
goalkeeper_df.Agility = np.where(goalkeeper_df.Agility >= goalkeeper_df.Agility.quantile(q=.95), 'No', 'Yes')
goalkeeper_df.Jumping = np.where(goalkeeper_df.Jumping >= goalkeeper_df.Jumping.quantile(q=.95), 'No', 'Yes')
goalkeeper_df.Heading = np.where(goalkeeper_df.Heading >= goalkeeper_df.Heading.quantile(q=.95), 'No', 'Yes')
goalkeeper_df.Shot_Power = np.where(goalkeeper_df.Shot_Power >= goalkeeper_df.Shot_Power.quantile(q=.95), 'No', 'Yes')
goalkeeper_df.Finishing = np.where(goalkeeper_df.Finishing >= goalkeeper_df.Finishing.quantile(q=.95), 'No', 'Yes')
goalkeeper_df.Long_Shots = np.where(goalkeeper_df.Long_Shots >= goalkeeper_df.Long_Shots.quantile(q=.95), 'No', 'Yes')
goalkeeper_df.Curve = np.where(goalkeeper_df.Curve >= goalkeeper_df.Curve.quantile(q=.95), 'No', 'Yes')
goalkeeper_df.Freekick_Accuracy = np.where(goalkeeper_df.Freekick_Accuracy >= goalkeeper_df.Freekick_Accuracy.quantile(q=.95), 'No', 'Yes')
goalkeeper_df.Penalties = np.where(goalkeeper_df.Penalties >= goalkeeper_df.Penalties.quantile(q=.95), 'No', 'Yes')
goalkeeper_df.Volleys = np.where(goalkeeper_df.Volleys >= goalkeeper_df.Volleys.quantile(q=.95), 'No', 'Yes')
goalkeeper_df.GK_Positioning = np.where(goalkeeper_df.GK_Positioning >= goalkeeper_df.GK_Positioning.quantile(q=.95), 'No', 'Yes')
goalkeeper_df.GK_Diving = np.where(goalkeeper_df.GK_Diving >= goalkeeper_df.GK_Diving.quantile(q=.95), 'No', 'Yes')
goalkeeper_df.GK_Kicking = np.where(goalkeeper_df.GK_Kicking >= goalkeeper_df.GK_Kicking.quantile(q=.95), 'No', 'Yes')
goalkeeper_df.GK_Handling = np.where(goalkeeper_df.GK_Handling >= goalkeeper_df.GK_Handling.quantile(q=.95), 'No', 'Yes')
goalkeeper_df.GK_Reflexes = np.where(goalkeeper_df.GK_Reflexes >= goalkeeper_df.GK_Reflexes.quantile(q=.95), 'No', 'Yes')
In [53]:
display(goalkeeper_df.head(5))
Name Nationality Rating Preffered_Position Skill_Moves Ball_Control Dribbling Marking Sliding_Tackle Standing_Tackle Aggression Reactions Attacking_Position Interceptions Vision Composure Crossing Short_Pass Long_Pass Acceleration Speed Stamina Strength Balance Agility Jumping Heading Shot_Power Finishing Long_Shots Curve Freekick_Accuracy Penalties Volleys GK_Positioning GK_Diving GK_Kicking GK_Handling GK_Reflexes
4 Manuel Neuer Germany 92 Goalkeeper No No No Yes Yes Yes Yes No Yes No No No Yes No No No No No No Yes Yes No No Yes Yes Yes Yes Yes No Yes No No No No No
5 De Gea Spain 90 Goalkeeper No No Yes Yes Yes No Yes No Yes No No Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes No Yes Yes Yes Yes Yes No Yes No No No No No
9 Thibaut Courtois Belgium 89 Goalkeeper No Yes Yes Yes Yes Yes Yes No Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes No No Yes No No
26 Hugo Lloris France 88 Goalkeeper No No Yes Yes Yes Yes Yes No Yes No Yes No Yes Yes Yes No No Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes No Yes No No Yes No No
30 Petr Čech Czech Republic 88 Goalkeeper No Yes Yes Yes Yes Yes Yes No Yes Yes Yes No Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes No No No No No

And now for what you all have been waiting all along.....

In [54]:
from IPython.display import Image
Image("img/MvsR.jpg")
Out[54]:
In [55]:
ronaldo_df = player_attdf.loc[player_attdf["Name"] == "Cristiano Ronaldo"]
ronaldo_df = ronaldo_df.drop(['Name', 'Nationality', 'Rating', 'Preffered_Position'],axis=1)
ronaldo_df
Out[55]:
Skill_Moves Ball_Control Dribbling Marking Sliding_Tackle Standing_Tackle Aggression Reactions Attacking_Position Interceptions Vision Composure Crossing Short_Pass Long_Pass Acceleration Speed Stamina Strength Balance Agility Jumping Heading Shot_Power Finishing Long_Shots Curve Freekick_Accuracy Penalties Volleys GK_Positioning GK_Diving GK_Kicking GK_Handling GK_Reflexes
0 5 93 92 22 23 31 63 96 94 29 85 86 84 83 77 91 92 92 80 63 90 95 85 92 93 90 81 76 85 88 14 7 15 11 11
In [56]:
messi_df = player_attdf.loc[player_attdf["Name"] == "Lionel Messi"]
messi_df = messi_df.drop(['Name', 'Nationality', 'Rating', 'Preffered_Position'],axis=1)
messi_df
Out[56]:
Skill_Moves Ball_Control Dribbling Marking Sliding_Tackle Standing_Tackle Aggression Reactions Attacking_Position Interceptions Vision Composure Crossing Short_Pass Long_Pass Acceleration Speed Stamina Strength Balance Agility Jumping Heading Shot_Power Finishing Long_Shots Curve Freekick_Accuracy Penalties Volleys GK_Positioning GK_Diving GK_Kicking GK_Handling GK_Reflexes
1 4 95 97 13 26 28 48 95 93 22 90 94 77 88 87 92 87 74 59 95 90 68 71 85 95 88 89 90 74 85 14 6 15 11 8
In [58]:
#%matplotlib notebook
# Data and attributes
attributes = ['Skill Moves', 'Ball Control', 'Dribbling', 'Marking', 'Sliding Tackle',
       'Standing Tackle', 'Aggression', 'Reactions', 'Attacking Position',
       'Interceptions', 'Vision', 'Composure', 'Crossing', 'Short Pass',
       'Long Pass', 'Acceleration', 'Speed', 'Stamina', 'Strength', 'Balance',
       'Agility', 'Jumping', 'Heading', 'Shot Power', 'Finishing',
       'Long Shots', 'Curve', 'Freekick Accuracy', 'Penalties', 'Volleys',
       'GK Positioning', 'GK Diving', 'GK Kicking', 'GK Handling',
       'GK Reflexes']
ronaldo = np.array([5,93,92,22,23,31,63,96,94,29,85,86,84,83,77,91,92,92,80,63,90,95,85,92,93,90,81,76,85,88,14,7,15,11,11])
messi = np.array([4,95,97,13,26,28,48,95,93,22,90,94,77,88,87,92,87,74,59,95,90,68,71,85,95,88,89,90,74,85,14,6,15,11,8])

idx = ronaldo.argsort()
attributes, ronaldo, messi = [np.take(x, idx) for x in [attributes, ronaldo, messi]]

y = np.arange(messi.size)

fig, axes = plt.subplots(ncols=2, sharey=True, figsize=(15,10))
ax0 = axes[0].barh(y, ronaldo, align='center', color='#00E676', zorder=10)
axes[0].set(title='Ronaldo')
ax1 = axes[1].barh(y, messi, align='center', color='#FF8F00', zorder=10)
axes[1].set(title='Messi')

axes[0].invert_xaxis()
axes[0].set(yticks=y, yticklabels=attributes)
axes[0].set_yticklabels(attributes, fontweight='bold', fontsize=8)
axes[0].yaxis.tick_right()

#for messi
for ax0 in axes.flat:
    ax0.margins(0.03)
    ax0.grid(True)   
    '''for i, v in enumerate(messi):
        ax0.text(v , i, str(v), color='black', fontweight='bold')'''
    for i in ax0.patches:
    # get_width pulls left or right; get_y pushes up or down
        ax0.text(i.get_width()+.1, i.get_y()+.31, \
            str((round(i.get_width()))), fontsize=8, fontweight='bold', color='black')

#for ronaldo        
for ax0 in axes.flat:
    ax0.margins(0.03)
    ax0.grid(True)
    for i, v in enumerate(ronaldo):
        ax0.text(v+3 , i, str(v), color='black', fontweight='bold')
fig.tight_layout()
fig.subplots_adjust(wspace=0.2)
plt.style.use('ggplot')
plt.show()
In [60]:
colors = ['green','red']             
chosen_players = ['Cristiano Ronaldo','Lionel Messi']


attributes = ['Ball_Control','Dribbling','Marking','Aggression','Reactions', 'Attacking_Position',
       'Interceptions', 'Vision', 'Composure', 'Crossing', 'Short_Pass',
       'Long_Pass', 'Acceleration', 'Speed', 'Stamina', 'Strength', 'Balance',
       'Agility', 'Jumping', 'Heading', 'Shot_Power', 'Finishing',
       'Long_Shots', 'Curve', 'Freekick_Accuracy', 'Penalties', 'Volleys'
        ]
ranges = [[2**-20, df[attr].max()] for attr in attributes]
datas = df[attributes].values

fig = plt.figure(figsize=(25,25))
radar = RadarChart(fig,attributes,ranges)
for player,data,color in zip(chosen_players,datas,colors):
    radar.plot(data,color = color,label=player)  
    radar.legend(loc = 1, fontsize = 'large')
    radar.fill(data, alpha = 0.1, color = color)
plt.show()

Best Rated Players in FIFA 17

In [61]:
print("no. of players having rating >=90 in FIFA 17:")
print((df.loc[:,'Rating'] > 89).sum())
print("Their Names: ")
bestRated_players = df[df.loc[:,'Rating']>89]#[['Name','Club','Rating']]    #Ronaldo is the winner as per the Ratings. 2 GKs are present too
bestRated_players
no. of players having rating >=90 in FIFA 17:
9
Their Names: 
Out[61]:
Name Nationality National_Position National_Kit Club Club_Position Club_Kit Club_Joining Contract_Expiry Rating Height Weight Preffered_Foot Birth_Date Age Preffered_Position Work_Rate Weak_foot Skill_Moves Ball_Control Dribbling Marking Sliding_Tackle Standing_Tackle Aggression Reactions Attacking_Position Interceptions Vision Composure Crossing Short_Pass Long_Pass Acceleration Speed Stamina Strength Balance Agility Jumping Heading Shot_Power Finishing Long_Shots Curve Freekick_Accuracy Penalties Volleys GK_Positioning GK_Diving GK_Kicking GK_Handling GK_Reflexes
0 Cristiano Ronaldo Portugal LS 7.0 Real Madrid LW 7.0 07-01-2009 2021.0 94 185 cm 80 kg Right 02-05-1985 32 LW/ST High / Low 4 5 93 92 22 23 31 63 96 94 29 85 86 84 83 77 91 92 92 80 63 90 95 85 92 93 90 81 76 85 88 14 7 15 11 11
1 Lionel Messi Argentina RW 10.0 FC Barcelona RW 10.0 07-01-2004 2018.0 93 170 cm 72 kg Left 06/24/1987 29 RW Medium / Medium 4 4 95 97 13 26 28 48 95 93 22 90 94 77 88 87 92 87 74 59 95 90 68 71 85 95 88 89 90 74 85 14 6 15 11 8
2 Neymar Brazil LW 10.0 FC Barcelona LW 11.0 07-01-2013 2021.0 92 174 cm 68 kg Right 02-05-1992 25 LW High / Medium 5 5 95 96 21 33 24 56 88 90 36 80 80 75 81 75 93 90 79 49 82 96 61 62 78 89 77 79 84 81 83 15 9 15 9 11
3 Luis Suárez Uruguay LS 9.0 FC Barcelona ST 9.0 07-11-2014 2021.0 92 182 cm 85 kg Right 01/24/1987 30 ST High / Medium 4 4 91 86 30 38 45 78 93 92 41 84 83 77 83 64 88 77 89 76 60 86 69 77 87 94 86 86 84 85 88 33 27 31 25 37
4 Manuel Neuer Germany GK 1.0 FC Bayern GK 1.0 07-01-2011 2021.0 92 193 cm 92 kg Right 03/27/1986 31 GK Medium / Medium 4 1 48 30 10 11 10 29 85 12 30 70 70 15 55 59 58 61 44 83 35 52 78 25 25 13 16 14 11 47 11 91 89 95 90 89
5 De Gea Spain GK 1.0 Manchester Utd GK 1.0 07-01-2011 2019.0 90 193 cm 82 kg Right 11-07-1990 26 GK Medium / Medium 3 1 31 13 13 13 21 38 88 12 30 68 60 17 31 32 56 56 25 64 43 57 67 21 31 13 12 21 19 40 13 86 88 87 85 90
6 Robert Lewandowski Poland LS 9.0 FC Bayern ST 9.0 07-01-2014 2021.0 90 185 cm 79 kg Right 08/21/1988 28 ST High / Medium 4 3 87 85 25 19 42 80 88 89 39 78 87 62 83 65 79 82 79 84 79 78 84 85 86 91 82 77 76 81 86 8 15 12 6 10
7 Gareth Bale Wales RS 11.0 Real Madrid RW 11.0 09-02-2013 2022.0 90 183 cm 74 kg Left 07/16/1989 27 RW High / Medium 3 4 88 89 51 52 55 65 87 86 59 79 85 87 86 80 93 95 78 80 65 77 85 86 91 87 90 86 85 76 76 5 15 11 15 6
8 Zlatan Ibrahimović Sweden NaN NaN Manchester Utd ST 9.0 07-01-2016 2017.0 90 195 cm 95 kg Right 10-03-1981 35 ST Medium / Low 4 4 90 87 15 27 41 84 85 86 20 83 91 76 84 76 69 74 75 93 41 86 72 80 93 90 88 82 82 91 93 9 13 10 15 12

Using Radar chart

In [59]:
def _scale_data(datas,ranges): #data of 1 row is provided, with every column ranges
    
    (x1,x2) = ranges[0]
    diff = x2-x1
    fact = 0
    scaled_data = []
    for data, (y1,y2) in zip(datas,ranges):
        
        fact = ((data-y1)/(y2-y1))*diff
        scaled_data.append(fact+x1)
    return scaled_data

'''
class RadarChart
'''

class RadarChart():
    
    def __init__(self,fig,attributes,ranges,n_ordinate_levels=6):
        #n_ordinate_levels is for grid scaping, attributes being features, ranges is a list of all feature ranges
        angles = np.arange(0, 360, 360./len(attributes))
        
        '''
        Add an axes at position rect [left, bottom, width, height] where all quantities are in fractions of figure width and height. 
        kwargs are legal Axes kwargs plus projection which sets the projection type of the axes. 
        (For backward compatibility, polar=True may also be provided, which is equivalent to projection='polar'). Valid values for projection are [‘aitoff’, ‘hammer’, ‘lambert’, ‘mollweide’, ‘polar’, ‘rectilinear’].
        Some of these projections support additional kwargs, which may be provided to add_axes(). 
        
        '''

        
        axes = [fig.add_axes([0.1,0.1,0.8,0.8],polar=True,label="axis{}".format(i)) for i in range(len(attributes))] #we use label to distinguish b/w different axes, its a must, else we dont get different axes!
            
        _,text = axes[0].set_thetagrids(angles,labels = attributes)
        
        '''
        Set the angles at which to place the theta grids (these
        gridlines are equal along the theta dimension).  *angles* is in
        degrees.
        Return value is a list of tuples (*line*, *label*), where   #here (_,text)
        *line* is :class:`~matplotlib.lines.Line2D` instances and the  
        *label* is :class:`~matplotlib.text.Text` instances.    
        '''
        
        for txt,angle in zip(text,angles):
            txt.set_rotation(angle-90)
            txt.set_size(15)
            
        '''
        Axes contains a Patch patch which is a Rectangle for Cartesian coordinates and a Circle for polar coordinates;
        this patch determines the shape, background and border of the plotting region;
        '''    
        
        for ax in axes[1:]:
            ax.patch.set_visible(False)  #patch is background
            ax.xaxis.set_visible(False)
            ax.grid("off")
        
        for i,ax in enumerate(axes):
            
            grid = np.linspace(*ranges[i],num = n_ordinate_levels)
            grid_label = [""] + [str(int(x)) for x in grid[1:]]                 ## circular grid label, from inner circle to outer circle
            ax.set_rgrids(grid,labels = grid_label,angle=angles[i])              ##rgrid is radia grids
                                                                                #grid here requires, starting circle distances, last circle dist, no. of circles i.e. *ranges[i],n_ordinate_levels
            ax.set_ylim(*ranges[i])
        
        self.angle = np.deg2rad(np.r_[angles,angles[0]])
        self.ranges  = ranges
        self.ax = axes[0]
        
    def plot(self,data,*args,**kwargs):                                    ##for plotting lines
        scaled_data = _scale_data(data,self.ranges)
        self.ax.plot(self.angle,np.r_[scaled_data,scaled_data[0]],*args,**kwargs)
            
    def fill(self,data,*args,**kwargs):
        scaled_data = _scale_data(data,self.ranges)                             ##for filling those lines generated polygon with color
        self.ax.fill(self.angle,np.r_[scaled_data,scaled_data[0]],*args,**kwargs)
        
    def legend(self,*args,**kwargs):                                       ##for labelling row name, i.e about self
        self.ax.legend(*args,**kwargs)
    
        
            
'''
class over 
'''
Out[59]:
'\nclass over \n'
In [62]:
attributes = ['Ball_Control','Dribbling','Marking','Aggression','Reactions', 'Attacking_Position',
       'Interceptions', 'Vision', 'Composure', 'Crossing', 'Short_Pass',
       'Long_Pass', 'Acceleration', 'Speed', 'Stamina', 'Strength', 'Balance',
       'Agility', 'Jumping', 'Heading', 'Shot_Power', 'Finishing',
       'Long_Shots', 'Curve', 'Freekick_Accuracy', 'Penalties', 'Volleys',
       'GK_Positioning', 'GK_Diving', 'GK_Kicking', 'GK_Handling',
       'GK_Reflexes']
ranges = [[2**-20, df[attr].max()] for attr in attributes]
top_players = bestRated_players['Name'].unique().tolist()
datas = df[attributes].values
print(datas.shape)
print(type(datas))
print(type(top_players))
colors = sns.hls_palette(n_colors=len(top_players))
print(type(colors))
top_players
(17588, 32)
<class 'numpy.ndarray'>
<class 'list'>
<class 'seaborn.palettes._ColorPalette'>
Out[62]:
['Cristiano Ronaldo',
 'Lionel Messi',
 'Neymar',
 'Luis Suárez',
 'Manuel Neuer',
 'De Gea',
 'Robert Lewandowski',
 'Gareth Bale',
 'Zlatan Ibrahimović']
In [63]:
fig = plt.figure(figsize=(25,25))
radar = RadarChart(fig,attributes,ranges)
for player,data,color in zip(top_players,datas,colors):
    radar.plot(data,color = color,label=player)  
    radar.legend(loc = 1, fontsize = 'large')
    radar.fill(data, alpha = 0.1, color = color)
plt.show()

Miscellaneous - How do player attributes fare with Age of a player

Comparing Rating, Skill moves, Strength, Aggression, Reactions, Vision, Speed, Stamina, Balance and Agility with Age of a player

All the above attributes are in general related to human body and its phases across different age, It is really interesting to see how a professional football players body changes over the years

In [64]:
sns.pairplot(data=df, hue='Age', vars=['Rating', 'Skill_Moves', 'Strength','Aggression', 'Reactions'])
Out[64]:
<seaborn.axisgrid.PairGrid at 0x1c25491fe48>
In [65]:
sns.pairplot(data=df, hue='Age', vars=['Vision', 'Speed', 'Stamina', 'Balance', 'Agility'])
Out[65]:
<seaborn.axisgrid.PairGrid at 0x1c259fbdfd0>